# -*- coding: utf-8 -*-
#!/usr/bin/python

import requests
import os
import shutil
from pyquery import PyQuery

headers = { "Accept":"text/html,application/xhtml+xml,application/xml;",
                    "Accept-Encoding":"gzip",
                    "Accept-Language":"zh-CN,zh;q=0.8",
                    "Referer":"http://www.xiaohuar.com/",
                    "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"
                    }

def crawl_all_xiaohua():
    path = os.path.join(os.getcwd(), "all_photo")
    if os.path.exists(path):
        filelist = os.listdir(path)
        for f in filelist:
            filepath = os.path.join(path, f)
            if os.path.isfile(filepath):
                os.remove(filepath)
                print(filepath + " removed!")
            elif os.path.isdir(filepath):
                shutil.rmtree(filepath, True)
                print("dir " + filepath + " removed!")

    if not os.path.exists(path):
        os.mkdir(path)

    for i in range(43):
        print("开始爬去第%s页图片\n" % (i))
        crawl_photo(i)

def crawl_photo(idx):
    url = 'http://www.xiaohuar.com/list-1-%s.html' % (idx)
    try:
        path = os.path.join(os.getcwd(), "all_photo")
        r = requests.get(url, headers=headers, timeout=30)
        txt = r.text
        jq = PyQuery(txt)
        items = jq('.item_t img')
        for i in items:
            ele_item = items(i)
            name = ele_item.attr('alt')
            url = 'http://www.xiaohuar.com' + ele_item.attr('src')
            pic = requests.get(url)
            print(name)
            print(url)
            picName = os.path.join(path, name)
            with open(picName + ".jpg", "wb") as code:
                code.write(pic.content)
                print(name + "下载成功\n")
    except Exception as ex:
        print(ex)

def crawl_all_college_xh():
    url = 'http://www.xiaohuar.com/2014.html'
    try:
        path = os.path.join(os.getcwd(), "college_photo")
        if not os.path.exists(path):
            os.mkdir(path)

        req = requests.get(url, headers=headers, timeout=30)
        txt = req.text
        jq = PyQuery(txt)
        items = jq('.newsboxlist a')
        for i in items:
            ele_item = items(i)
            htmlUrl = ele_item.attr('href')
            crawl_college_xh_photo(htmlUrl)
    except Exception as ex:
        print(ex)

def crawl_college_xh_photo(htmlUrl):
    try:
        req = requests.get(htmlUrl, headers=headers, timeout=30)
        txt = req.text
        jq = PyQuery(txt)
        items = jq('.content_wrap img')
        for i in items:
            ele_item = items(i)
            name = ele_item.attr('alt')
            url = 'http://www.xiaohuar.com' + ele_item.attr('src')

            path = os.path.join(os.getcwd(), "college_photo")
            path = os.path.join(path, name)
            if not os.path.exists(path):
                os.mkdir(path)

            pic = requests.get(url)
            print(name)
            print(url)
            picName = os.path.join(path, name) + ".jpg"
            with open(picName, "wb") as code:
                code.write(pic.content)
                print(name + "下载成功\n")
            print("抓取：%s 结束\n" % (name))
            break
    except Exception as ex:
        print(ex)

if __name__ == '__main__':
    crawl_all_xiaohua()
    # crawl_all_college_xh()
